import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import shap
import os
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from xgboost import XGBRegressor

random_state = 822              O
random_state = 315              OH 
random_state = 339              OOH

X = data.iloc[0:, [12, 14, 16, 17, 19, 20, 21, 22, 23, 24, 47, 48, 49, 50]]
y = data.iloc[0:, [6]].values.ravel()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=random_state
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model.fit(X_train_scaled, y_train)

y_train_pred = model.predict(X_train_scaled)
y_test_pred = model.predict(X_test_scaled)

